%matplotlib inline
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from math import pi
data_path = "data.csv"
df = pd.read_csv(data_path)
df.columns
df.head()
df.describe()
df.info()
df.isna().sum()
df.dropna().shape[0]
for feature in df.columns:
if df[feature].dtype == 'float64':
df[feature].fillna(df[feature].mean(), inplace=True)
df['Weight'].fillna('200lbs', inplace = True)
df['Contract Valid Until'].fillna(2019, inplace = True)
df['Height'].fillna("5'11", inplace = True)
df['Loaned From'].fillna('None', inplace = True)
df['Joined'].fillna('Jul 1, 2018', inplace = True)
df['Jersey Number'].fillna(8, inplace = True)
df['Body Type'].fillna('Normal', inplace = True)
df['Position'].fillna('ST', inplace = True)
df['Club'].fillna('No Club', inplace = True)
df['Work Rate'].fillna('Medium/ Medium', inplace = True)
df['Skill Moves'].fillna(df['Skill Moves'].median(), inplace = True)
df['Weak Foot'].fillna(3, inplace = True)
df['Preferred Foot'].fillna('Right', inplace = True)
df['International Reputation'].fillna(1, inplace = True)
df['Wage'].fillna('€200K', inplace = True)
df.fillna(0, inplace = True)
def value_to_int(df_value):
try:
value = float(df_value[1:-1])
suffix = df_value[-1:]
if suffix == 'M':
value = value * 1000000
elif suffix == 'K':
value = value * 1000
except ValueError:
value = 0
return value
df['Value'] = df['Value'].apply(value_to_int)
df['Wage'] = df['Wage'].apply(value_to_int)
def weight_correction(df):
try:
value = float(df[:-3])
except:
value = 0
return value
df['Weight'] = df.Weight.apply(weight_correction)
def defending(data):
return int(round((data[['Marking', 'StandingTackle',
'SlidingTackle']].mean()).mean()))
def general(data):
return int(round((data[['HeadingAccuracy', 'Dribbling', 'Curve',
'BallControl']].mean()).mean()))
def mental(data):
return int(round((data[['Aggression', 'Interceptions', 'Positioning',
'Vision','Composure']].mean()).mean()))
def passing(data):
return int(round((data[['Crossing', 'ShortPassing',
'LongPassing']].mean()).mean()))
def mobility(data):
return int(round((data[['Acceleration', 'SprintSpeed',
'Agility','Reactions']].mean()).mean()))
def power(data):
return int(round((data[['Balance', 'Jumping', 'Stamina',
'Strength']].mean()).mean()))
def rating(data):
return int(round((data[['Potential', 'Overall']].mean()).mean()))
def shooting(data):
return int(round((data[['Finishing', 'Volleys', 'FKAccuracy',
'ShotPower','LongShots', 'Penalties']].mean()).mean()))
# renaming a column
df.rename(columns={'Club Logo':'Club_Logo'}, inplace=True)
# adding these categories to the data
df['Defending'] = df.apply(defending, axis = 1)
df['General'] = df.apply(general, axis = 1)
df['Mental'] = df.apply(mental, axis = 1)
df['Passing'] = df.apply(passing, axis = 1)
df['Mobility'] = df.apply(mobility, axis = 1)
df['Power'] = df.apply(power, axis = 1)
df['Rating'] = df.apply(rating, axis = 1)
df['Shooting'] = df.apply(shooting, axis = 1)
players = df[['Name','Defending','General','Mental','Passing',
'Mobility','Power','Rating','Shooting','Flag','Age',
'Nationality', 'Photo', 'Club_Logo', 'Club']]
ax = sns.countplot(x='Position', data=df, order = df['Position'].value_counts().index)
plt.figure(figsize = (20, 10))
ax.set_title(label = 'Number of footballers available in each position', fontsize = 20)
plt.show()
player_features = (
'Acceleration', 'Aggression', 'Agility',
'Balance', 'BallControl', 'Composure',
'Crossing', 'Dribbling', 'FKAccuracy',
'Finishing', 'GKDiving', 'GKHandling',
'GKKicking', 'GKPositioning', 'GKReflexes',
'HeadingAccuracy', 'Interceptions', 'Jumping',
'LongPassing', 'LongShots', 'Marking', 'Penalties'
)
from math import pi
idx = 1
plt.figure(figsize=(15,45))
for position_name, features in df.groupby(df['Position'])[player_features].mean().iterrows():
top_features = dict(features.nlargest(5))
# number of variable
categories=top_features.keys()
N = len(categories)
# We are going to plot the first line of the data frame.
# But we need to repeat the first value to close the circular graph:
values = list(top_features.values())
values += values[:1]
# What will be the angle of each axis in the plot? (we divide the plot / number of variable)
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]
# Initialise the spider plot
ax = plt.subplot(10, 3, idx, polar=True)
# Draw one axe per variable + add labels labels yet
plt.xticks(angles[:-1], categories, color='grey', size=8)
# Draw ylabels
ax.set_rlabel_position(0)
plt.yticks([25,50,75], ["25","50","75"], color="grey", size=7)
plt.ylim(0,100)
plt.subplots_adjust(hspace = 0.5)
# Plot data
ax.plot(angles, values, linewidth=1, linestyle='solid')
# Fill area
ax.fill(angles, values, 'b', alpha=0.1)
plt.title(position_name, size=11, y=1.1)
idx += 1
sns.set(style = 'dark', palette = 'colorblind', color_codes = True)
x = df.Special
plt.figure(figsize = (12, 8))
ax = sns.distplot(x, bins = 50, kde = False, color = 'm')
ax.set_xlabel(xlabel = 'Special score range', fontsize = 16)
ax.set_ylabel(ylabel = 'Count of the Players',fontsize = 16)
ax.set_title(label = 'Histogram for the Speciality Scores of the Players', fontsize = 20)
plt.show()
df.plot(kind='scatter', x='Special', y='Skill Moves')
sns.countplot(x = 'Skill Moves', data=df)
df['Skill Moves'].value_counts()
plt.rcParams['figure.figsize'] = (20, 10)
skill_df = df[df['Skill Moves'] == 5][['Name','Nationality']]
sns.countplot(x='Nationality', data=skill_df, order=skill_df.Nationality.value_counts().iloc[:5].index)
df.Nationality.value_counts().nlargest(5).plot(kind='bar')
countries = ['England', 'Germany', 'Spain', 'Argentina', 'France']
data_countries = df[df['Nationality'].isin(countries)]
plt.rcParams['figure.figsize'] = (12, 7)
ax = sns.violinplot(x = data_countries['Nationality'], y = data_countries['Weight'], palette = 'colorblind')
ax.set_xlabel(xlabel = 'Countries', fontsize = 9)
ax.set_ylabel(ylabel = 'Weight in lbs', fontsize = 9)
ax.set_title(label = 'Distribution of Weight of players from different countries', fontsize = 20)
import matplotlib.image as mpimg
import requests
def print_club_flag(index, club):
logo = df[df['Club'] == club]['Club_Logo'].iloc[0]
logo_image = "img_club_logo.jpg"
logo_flag = requests.get(logo).content
with open(logo_image, 'wb') as handler:
handler.write(logo_flag)
img=mpimg.imread(logo_image)
imgplot = plt.imshow(img, interpolation="lanczos")
plt.figure(figsize=(1,1))
print("%d. %s" %(index+1, club))
plt.show()
def print_national_flag(index, nation):
logo = df[df['Nationality'] == nation]['Flag'].iloc[0]
logo_image = "img_nation_logo.jpg"
logo_flag = requests.get(logo).content
with open(logo_image, 'wb') as handler:
handler.write(logo_flag)
img=mpimg.imread(logo_image)
imgplot = plt.imshow(img, interpolation="lanczos")
plt.figure(figsize=(1,1))
print("%d. %s" %(index+1, club))
plt.show()
d = {'Overall': 'Average_Rating'}
best_overall_club_df = df.groupby('Club').agg({'Overall':'mean'}).rename(columns=d)
clubs = best_overall_club_df.Average_Rating.nlargest(5).index
for index, club in enumerate(clubs):
print_club_flag(index, club)
attck_list = ['Shooting', 'Power', 'Passing']
best_attack_df = players.groupby('Club')[attck_list].sum().sum(axis=1)
clubs = best_attack_df.nlargest(5).index
for index, club in enumerate(clubs):
print_club_flag(index, club)
best_defense_df = players.groupby('Club')['Defending'].sum()
clubs = best_defense_df.nlargest(5).index
for index, club in enumerate(clubs):
print_club_flag(index, club)
d = {'Overall': 'Average_Rating'}
best_overall_country_df = df.groupby('Nationality').agg({'Overall':'mean'}).rename(columns=d)
nations = best_overall_country_df.Average_Rating.nlargest(3).index
for index, nation in enumerate(nations):
print_national_flag(index, nation)
best_3_uae = df[df['Nationality'] == 'United Arab Emirates']['Overall'].nlargest(3)
print(best_3_uae)
uae_df = df[df['Nationality'] == 'United Arab Emirates']
uae_df[uae_df['Overall'].isin(best_3_uae)]['Name']
best_attack_nation_df = players.groupby('Nationality')[attck_list].sum().sum(axis=1)
nations = best_attack_nation_df.nlargest(5).index
for index, nation in enumerate(nations):
print_national_flag(index, nation)
best_defense_nation_df = players.groupby('Nationality')['Defending'].sum()
nations = best_defense_nation_df.nlargest(5).index
for index, nation in enumerate(nations):
print_national_flag(index, nation)
import requests
import random
from math import pi
import matplotlib.image as mpimg
from matplotlib.offsetbox import (OffsetImage,AnnotationBbox)
def details(row, title, image, age, nationality, photo, logo, club):
flag_image = "img_flag.jpg"
player_image = "img_player.jpg"
logo_image = "img_club_logo.jpg"
img_flag = requests.get(image).content
with open(flag_image, 'wb') as handler:
handler.write(img_flag)
player_img = requests.get(photo).content
with open(player_image, 'wb') as handler:
handler.write(player_img)
logo_img = requests.get(logo).content
with open(logo_image, 'wb') as handler:
handler.write(logo_img)
r = lambda: random.randint(0,255)
colorRandom = '#%02X%02X%02X' % (r(),r(),r())
if colorRandom == '#ffffff':colorRandom = '#a5d6a7'
basic_color = '#37474f'
color_annotate = '#01579b'
img = mpimg.imread(flag_image)
#flg_img = mpimg.imread(logo_image)
plt.figure(figsize=(15,8))
categories=list(players)[1:]
coulumnDontUseGraph = ['Flag', 'Age', 'Nationality', 'Photo', 'Logo', 'Club']
N = len(categories) - len(coulumnDontUseGraph)
angles = [n / float(N) * 2 * pi for n in range(N)]
angles += angles[:1]
ax = plt.subplot(111, projection='polar')
ax.set_theta_offset(pi / 2)
ax.set_theta_direction(-1)
plt.xticks(angles[:-1], categories, color= 'black', size=17)
ax.set_rlabel_position(0)
plt.yticks([25,50,75,100], ["25","50","75","100"], color= basic_color, size= 10)
plt.ylim(0,100)
values = players.loc[row].drop('Name').values.flatten().tolist()
valuesDontUseGraph = [image, age, nationality, photo, logo, club]
values = [e for e in values if e not in (valuesDontUseGraph)]
values += values[:1]
ax.plot(angles, values, color= basic_color, linewidth=1, linestyle='solid')
ax.fill(angles, values, color= colorRandom, alpha=0.5)
axes_coords = [0, 0, 1, 1]
ax_image = plt.gcf().add_axes(axes_coords,zorder= -1)
ax_image.imshow(img,alpha=0.5)
ax_image.axis('off')
ax.annotate('Nationality: ' + nationality.upper(), xy=(10,10), xytext=(103, 138),
fontsize= 12,
color = 'white',
bbox={'facecolor': color_annotate, 'pad': 7})
ax.annotate('Age: ' + str(age), xy=(10,10), xytext=(43, 180),
fontsize= 15,
color = 'white',
bbox={'facecolor': color_annotate, 'pad': 7})
ax.annotate('Team: ' + club.upper(), xy=(10,10), xytext=(92, 168),
fontsize= 12,
color = 'white',
bbox={'facecolor': color_annotate, 'pad': 7})
arr_img_player = plt.imread(player_image, format='jpg')
imagebox_player = OffsetImage(arr_img_player)
imagebox_player.image.axes = ax
abPlayer = AnnotationBbox(imagebox_player, (0.5, 0.7),
xybox=(313, 223),
xycoords='data',
boxcoords="offset points"
)
arr_img_logo = plt.imread(logo_image, format='jpg')
imagebox_logo = OffsetImage(arr_img_logo)
imagebox_logo.image.axes = ax
abLogo = AnnotationBbox(imagebox_logo, (0.5, 0.7),
xybox=(-320, -226),
xycoords='data',
boxcoords="offset points"
)
ax.add_artist(abPlayer)
ax.add_artist(abLogo)
plt.title(title, size=50, color= basic_color)
# defining a polar graph
def get_id_card(id = 0):
if 0 <= id < len(df.ID):
details(row = players.index[id],
title = players['Name'][id],
age = players['Age'][id],
photo = players['Photo'][id],
nationality = players['Nationality'][id],
image = players['Flag'][id],
logo = players['Club_Logo'][id],
club = players['Club'][id])
else:
print('The base has 17917 players. You can put positive numbers from 0 to 17917')
best_footballers = df['Overall'].nlargest(5)
for index in best_footballers.index:
get_id_card(index)
df.loc[df.groupby(df['Position'])['Potential'].idxmax()][['Name', 'Position', 'Overall', 'Age', 'Nationality', 'Club']]
sns.set(style = 'dark', palette = 'colorblind', color_codes = True)
x = df.Wage
plt.figure(figsize = (12, 8))
ax = sns.distplot(x, bins = 50, kde = False, color = 'm')
ax.set_xlabel(xlabel = 'Player Wage', fontsize = 16)
ax.set_ylabel(ylabel = 'Player Count',fontsize = 16)
ax.set_title(label = 'Histogram that shows the wage of the Players', fontsize = 20)
plt.show()